This is a pipeline for processing data from 16S amplicon sequencing. The pipeline utilises ASVs and will in the analysis step use rarefaction as normalization method.
Set up environment with names of files to be scanned in.
samples <- scan("/Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/samples", what="character")
Read 35 items
forward_reads <- paste0("/Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/",samples,".R1.fastq")
reverse_reads <- paste0("/Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/",samples,".R2.fastq")
filtered_forward_reads <- paste0("/Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/",samples, "_sub_R1_filtered.fq.gz")
filtered_reverse_reads <- paste0("/Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/",samples, "_sub_R2_filtered.fq.gz")
Plot Quality of reads before filtering
library(dada2)
plotQualityProfile(forward_reads)
plotQualityProfile(reverse_reads)
Filter the reads. Obs! Make sure that there is enough overlap between the forward and resverse so it’s possible to merge in later steps. An overlap of 20bp is good. Then plot the new quality after trimming.
filtered_out <- filterAndTrim(forward_reads, filtered_forward_reads,
reverse_reads, filtered_reverse_reads,
maxEE=c(2,2), truncQ=2, rm.phix=TRUE,
truncLen = c(250,200))
plotQualityProfile(filtered_forward_reads)
plotQualityProfile(filtered_reverse_reads)
Generate error model, dereplicate, infer ASVs, and merge.
err_forward_reads <- learnErrors(filtered_forward_reads)
103038750 total bases in 412155 reads from 17 samples will be used for learning the error rates.
plotErrors(err_forward_reads, nominalQ=TRUE)
err_reverse_reads <- learnErrors(filtered_reverse_reads)
101047200 total bases in 505236 reads from 21 samples will be used for learning the error rates.
plotErrors(err_reverse_reads, nominalQ=TRUE)
derep_forward <- derepFastq(filtered_forward_reads, verbose=TRUE)
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_41_sub_R1_filtered.fq.gz
Encountered 5183 unique sequences from 15798 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_42_sub_R1_filtered.fq.gz
Encountered 4067 unique sequences from 16222 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_43_sub_R1_filtered.fq.gz
Encountered 7100 unique sequences from 19130 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_44_sub_R1_filtered.fq.gz
Encountered 15112 unique sequences from 51847 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_45_sub_R1_filtered.fq.gz
Encountered 13515 unique sequences from 50871 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_47_sub_R1_filtered.fq.gz
Encountered 3809 unique sequences from 10022 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_48_sub_R1_filtered.fq.gz
Encountered 7367 unique sequences from 28075 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_49_sub_R1_filtered.fq.gz
Encountered 3310 unique sequences from 7961 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_50_sub_R1_filtered.fq.gz
Encountered 24066 unique sequences from 78392 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_51-55_sub_R1_filtered.fq.gz
Encountered 7136 unique sequences from 17480 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_56-60_sub_R1_filtered.fq.gz
Encountered 3248 unique sequences from 8601 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_1_sub_R1_filtered.fq.gz
Encountered 11280 unique sequences from 40480 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_10_sub_R1_filtered.fq.gz
Encountered 2247 unique sequences from 6681 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_11-15_sub_R1_filtered.fq.gz
Encountered 4967 unique sequences from 16439 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_16-20_sub_R1_filtered.fq.gz
Encountered 4727 unique sequences from 11066 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_2_sub_R1_filtered.fq.gz
Encountered 3199 unique sequences from 11148 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_3_sub_R1_filtered.fq.gz
Encountered 5959 unique sequences from 21942 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_4_sub_R1_filtered.fq.gz
Encountered 3370 unique sequences from 11669 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_5_sub_R1_filtered.fq.gz
Encountered 6484 unique sequences from 20400 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_6_sub_R1_filtered.fq.gz
Encountered 7033 unique sequences from 24633 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_7_sub_R1_filtered.fq.gz
Encountered 9786 unique sequences from 36379 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_8_sub_R1_filtered.fq.gz
Encountered 5382 unique sequences from 14496 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_9_sub_R1_filtered.fq.gz
Encountered 3770 unique sequences from 11331 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_21_sub_R1_filtered.fq.gz
Encountered 2474 unique sequences from 8486 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_22_sub_R1_filtered.fq.gz
Encountered 5609 unique sequences from 20891 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_23_sub_R1_filtered.fq.gz
Encountered 2343 unique sequences from 7062 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_24_sub_R1_filtered.fq.gz
Encountered 16382 unique sequences from 50626 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_25_sub_R1_filtered.fq.gz
Encountered 1 unique sequences from 1 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_26_sub_R1_filtered.fq.gz
Encountered 10103 unique sequences from 25160 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_27_sub_R1_filtered.fq.gz
Encountered 2941 unique sequences from 6829 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_28_sub_R1_filtered.fq.gz
Encountered 6182 unique sequences from 18632 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_29_sub_R1_filtered.fq.gz
Encountered 1708 unique sequences from 3521 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_30_sub_R1_filtered.fq.gz
Encountered 7636 unique sequences from 18075 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_31-35_sub_R1_filtered.fq.gz
Encountered 20309 unique sequences from 52833 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_36-40_sub_R1_filtered.fq.gz
Encountered 5787 unique sequences from 12305 total sequences read.
names(derep_forward) <- samples # the sample names in these objects are initially the file names of the samples, this sets them to the sample names for the rest of the workflow
derep_reverse <- derepFastq(filtered_reverse_reads, verbose=TRUE)
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_41_sub_R2_filtered.fq.gz
Encountered 5898 unique sequences from 15798 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_42_sub_R2_filtered.fq.gz
Encountered 5083 unique sequences from 16222 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_43_sub_R2_filtered.fq.gz
Encountered 7588 unique sequences from 19130 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_44_sub_R2_filtered.fq.gz
Encountered 17732 unique sequences from 51847 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_45_sub_R2_filtered.fq.gz
Encountered 12753 unique sequences from 50871 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_47_sub_R2_filtered.fq.gz
Encountered 4554 unique sequences from 10022 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_48_sub_R2_filtered.fq.gz
Encountered 9703 unique sequences from 28075 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_49_sub_R2_filtered.fq.gz
Encountered 3883 unique sequences from 7961 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_50_sub_R2_filtered.fq.gz
Encountered 30279 unique sequences from 78392 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_51-55_sub_R2_filtered.fq.gz
Encountered 7704 unique sequences from 17480 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK3_56-60_sub_R2_filtered.fq.gz
Encountered 3504 unique sequences from 8601 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_1_sub_R2_filtered.fq.gz
Encountered 15509 unique sequences from 40480 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_10_sub_R2_filtered.fq.gz
Encountered 2606 unique sequences from 6681 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_11-15_sub_R2_filtered.fq.gz
Encountered 6141 unique sequences from 16439 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_16-20_sub_R2_filtered.fq.gz
Encountered 4943 unique sequences from 11066 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_2_sub_R2_filtered.fq.gz
Encountered 3810 unique sequences from 11148 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_3_sub_R2_filtered.fq.gz
Encountered 7002 unique sequences from 21942 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_4_sub_R2_filtered.fq.gz
Encountered 3930 unique sequences from 11669 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_5_sub_R2_filtered.fq.gz
Encountered 7418 unique sequences from 20400 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_6_sub_R2_filtered.fq.gz
Encountered 7773 unique sequences from 24633 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_7_sub_R2_filtered.fq.gz
Encountered 11735 unique sequences from 36379 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_8_sub_R2_filtered.fq.gz
Encountered 6141 unique sequences from 14496 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK5_9_sub_R2_filtered.fq.gz
Encountered 4640 unique sequences from 11331 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_21_sub_R2_filtered.fq.gz
Encountered 3079 unique sequences from 8486 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_22_sub_R2_filtered.fq.gz
Encountered 7150 unique sequences from 20891 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_23_sub_R2_filtered.fq.gz
Encountered 2986 unique sequences from 7062 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_24_sub_R2_filtered.fq.gz
Encountered 17301 unique sequences from 50626 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_25_sub_R2_filtered.fq.gz
Encountered 1 unique sequences from 1 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_26_sub_R2_filtered.fq.gz
Encountered 11535 unique sequences from 25160 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_27_sub_R2_filtered.fq.gz
Encountered 3374 unique sequences from 6829 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_28_sub_R2_filtered.fq.gz
Encountered 6797 unique sequences from 18632 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_29_sub_R2_filtered.fq.gz
Encountered 1809 unique sequences from 3521 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_30_sub_R2_filtered.fq.gz
Encountered 8770 unique sequences from 18075 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_31-35_sub_R2_filtered.fq.gz
Encountered 19668 unique sequences from 52833 total sequences read.
Dereplicating sequence entries in Fastq file: /Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/VK7_36-40_sub_R2_filtered.fq.gz
Encountered 5927 unique sequences from 12305 total sequences read.
names(derep_reverse) <- samples
dada_forward <- dada(derep_forward, err=err_forward_reads, pool="pseudo")
Sample 1 - 15798 reads in 5183 unique sequences.
Sample 2 - 16222 reads in 4067 unique sequences.
Sample 3 - 19130 reads in 7100 unique sequences.
Sample 4 - 51847 reads in 15112 unique sequences.
Sample 5 - 50871 reads in 13515 unique sequences.
Sample 6 - 10022 reads in 3809 unique sequences.
Sample 7 - 28075 reads in 7367 unique sequences.
Sample 8 - 7961 reads in 3310 unique sequences.
Sample 9 - 78392 reads in 24066 unique sequences.
Sample 10 - 17480 reads in 7136 unique sequences.
Sample 11 - 8601 reads in 3248 unique sequences.
Sample 12 - 40480 reads in 11280 unique sequences.
Sample 13 - 6681 reads in 2247 unique sequences.
Sample 14 - 16439 reads in 4967 unique sequences.
Sample 15 - 11066 reads in 4727 unique sequences.
Sample 16 - 11148 reads in 3199 unique sequences.
Sample 17 - 21942 reads in 5959 unique sequences.
Sample 18 - 11669 reads in 3370 unique sequences.
Sample 19 - 20400 reads in 6484 unique sequences.
Sample 20 - 24633 reads in 7033 unique sequences.
Sample 21 - 36379 reads in 9786 unique sequences.
Sample 22 - 14496 reads in 5382 unique sequences.
Sample 23 - 11331 reads in 3770 unique sequences.
Sample 24 - 8486 reads in 2474 unique sequences.
Sample 25 - 20891 reads in 5609 unique sequences.
Sample 26 - 7062 reads in 2343 unique sequences.
Sample 27 - 50626 reads in 16382 unique sequences.
Sample 28 - 1 reads in 1 unique sequences.
Sample 29 - 25160 reads in 10103 unique sequences.
Sample 30 - 6829 reads in 2941 unique sequences.
Sample 31 - 18632 reads in 6182 unique sequences.
Sample 32 - 3521 reads in 1708 unique sequences.
Sample 33 - 18075 reads in 7636 unique sequences.
Sample 34 - 52833 reads in 20309 unique sequences.
Sample 35 - 12305 reads in 5787 unique sequences.
selfConsist step 2
dada_reverse <- dada(derep_reverse, err=err_reverse_reads, pool="pseudo")
Sample 1 - 15798 reads in 5898 unique sequences.
Sample 2 - 16222 reads in 5083 unique sequences.
Sample 3 - 19130 reads in 7588 unique sequences.
Sample 4 - 51847 reads in 17732 unique sequences.
Sample 5 - 50871 reads in 12753 unique sequences.
Sample 6 - 10022 reads in 4554 unique sequences.
Sample 7 - 28075 reads in 9703 unique sequences.
Sample 8 - 7961 reads in 3883 unique sequences.
Sample 9 - 78392 reads in 30279 unique sequences.
Sample 10 - 17480 reads in 7704 unique sequences.
Sample 11 - 8601 reads in 3504 unique sequences.
Sample 12 - 40480 reads in 15509 unique sequences.
Sample 13 - 6681 reads in 2606 unique sequences.
Sample 14 - 16439 reads in 6141 unique sequences.
Sample 15 - 11066 reads in 4943 unique sequences.
Sample 16 - 11148 reads in 3810 unique sequences.
Sample 17 - 21942 reads in 7002 unique sequences.
Sample 18 - 11669 reads in 3930 unique sequences.
Sample 19 - 20400 reads in 7418 unique sequences.
Sample 20 - 24633 reads in 7773 unique sequences.
Sample 21 - 36379 reads in 11735 unique sequences.
Sample 22 - 14496 reads in 6141 unique sequences.
Sample 23 - 11331 reads in 4640 unique sequences.
Sample 24 - 8486 reads in 3079 unique sequences.
Sample 25 - 20891 reads in 7150 unique sequences.
Sample 26 - 7062 reads in 2986 unique sequences.
Sample 27 - 50626 reads in 17301 unique sequences.
Sample 28 - 1 reads in 1 unique sequences.
Sample 29 - 25160 reads in 11535 unique sequences.
Sample 30 - 6829 reads in 3374 unique sequences.
Sample 31 - 18632 reads in 6797 unique sequences.
Sample 32 - 3521 reads in 1809 unique sequences.
Sample 33 - 18075 reads in 8770 unique sequences.
Sample 34 - 52833 reads in 19668 unique sequences.
Sample 35 - 12305 reads in 5927 unique sequences.
selfConsist step 2
merged_amplicons <- mergePairs(dada_forward, derep_forward, dada_reverse,
derep_reverse, trimOverhang=TRUE)
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Duplicate sequences in merged output.
Create Sequence Table
seqtab <- makeSequenceTable(merged_amplicons)
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
dim(seqtab)
[1] 35 7826
seqtab.nochim <- removeBimeraDenovo(seqtab, verbose=T)
Identified 1385 bimeras out of 7826 input sequences.
sum(seqtab.nochim)/sum(seqtab)
[1] 0.9814065
Generate a read track table showing the amount of reads remaining at each step of the processing. Creates and writes a tab separated file.
# set a little function
getN <- function(x) sum(getUniques(x))
# making a little table
summary_tab <- data.frame(row.names=samples, dada2_input=filtered_out[,1],
filtered=filtered_out[,2], dada_f=sapply(dada_forward, getN),
dada_r=sapply(dada_reverse, getN), merged=sapply(merged_amplicons, getN),
nonchim=rowSums(seqtab.nochim),
final_perc_reads_retained=round(rowSums(seqtab.nochim)/filtered_out[,1]*100, 1))
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
Duplicate sequences detected and merged.
summary_tab
write.table(summary_tab, "read-count-tracking-new.tsv", quote=FALSE, sep="\t", col.names=NA)
Perform taxonomy classification. First download and Load SILVA v138 reference data. Also load the DECIPHER package used for classifying.
## downloading DECIPHER-formatted SILVA v138 reference
download.file(url="http://www2.decipher.codes/Classification/TrainingSets/SILVA_SSU_r138_2019.RData", destfile="SILVA_SSU_r138_2019.RData")
trying URL 'http://www2.decipher.codes/Classification/TrainingSets/SILVA_SSU_r138_2019.RData'
Content type 'unknown' length 151849504 bytes (144.8 MB)
==================================================
downloaded 144.8 MB
## loading reference taxonomy object
load("SILVA_SSU_r138_2019.RData")
library(DECIPHER)
## creating DNAStringSet object of our ASVs
dna <- DNAStringSet(getSequences(seqtab.nochim))
## and classifying
tax_info <- IdTaxa(test=dna, trainingSet=trainingSet, strand="both", processors=NULL)
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 3%
|
|=== | 4%
|
|==== | 4%
|
|==== | 5%
|
|===== | 5%
|
|===== | 6%
|
|====== | 6%
|
|====== | 7%
|
|====== | 8%
|
|======= | 8%
|
|======= | 9%
|
|======== | 9%
|
|======== | 10%
|
|========= | 10%
|
|========= | 11%
|
|========== | 11%
|
|========== | 12%
|
|=========== | 12%
|
|=========== | 13%
|
|============ | 13%
|
|============ | 14%
|
|============ | 15%
|
|============= | 15%
|
|============= | 16%
|
|============== | 16%
|
|============== | 17%
|
|=============== | 17%
|
|=============== | 18%
|
|================ | 18%
|
|================ | 19%
|
|================= | 19%
|
|================= | 20%
|
|================== | 20%
|
|================== | 21%
|
|================== | 22%
|
|=================== | 22%
|
|=================== | 23%
|
|==================== | 23%
|
|==================== | 24%
|
|===================== | 24%
|
|===================== | 25%
|
|====================== | 25%
|
|====================== | 26%
|
|======================= | 26%
|
|======================= | 27%
|
|======================== | 27%
|
|======================== | 28%
|
|========================= | 28%
|
|========================= | 29%
|
|========================= | 30%
|
|========================== | 30%
|
|========================== | 31%
|
|=========================== | 31%
|
|=========================== | 32%
|
|============================ | 32%
|
|============================ | 33%
|
|============================= | 33%
|
|============================= | 34%
|
|============================== | 34%
|
|============================== | 35%
|
|=============================== | 35%
|
|=============================== | 36%
|
|=============================== | 37%
|
|================================ | 37%
|
|================================ | 38%
|
|================================= | 38%
|
|================================= | 39%
|
|================================== | 39%
|
|================================== | 40%
|
|=================================== | 40%
|
|=================================== | 41%
|
|==================================== | 41%
|
|==================================== | 42%
|
|===================================== | 42%
|
|===================================== | 43%
|
|===================================== | 44%
|
|====================================== | 44%
|
|====================================== | 45%
|
|======================================= | 45%
|
|======================================= | 46%
|
|======================================== | 46%
|
|======================================== | 47%
|
|========================================= | 47%
|
|========================================= | 48%
|
|========================================== | 48%
|
|========================================== | 49%
|
|=========================================== | 49%
|
|=========================================== | 50%
|
|=========================================== | 51%
|
|============================================ | 51%
|
|============================================ | 52%
|
|============================================= | 52%
|
|============================================= | 53%
|
|============================================== | 53%
|
|============================================== | 54%
|
|=============================================== | 54%
|
|=============================================== | 55%
|
|================================================ | 55%
|
|================================================ | 56%
|
|================================================= | 56%
|
|================================================= | 57%
|
|================================================= | 58%
|
|================================================== | 58%
|
|================================================== | 59%
|
|=================================================== | 59%
|
|=================================================== | 60%
|
|==================================================== | 60%
|
|==================================================== | 61%
|
|===================================================== | 61%
|
|===================================================== | 62%
|
|====================================================== | 62%
|
|====================================================== | 63%
|
|======================================================= | 63%
|
|======================================================= | 64%
|
|======================================================= | 65%
|
|======================================================== | 65%
|
|======================================================== | 66%
|
|========================================================= | 66%
|
|========================================================= | 67%
|
|========================================================== | 67%
|
|========================================================== | 68%
|
|=========================================================== | 68%
|
|=========================================================== | 69%
|
|============================================================ | 69%
|
|============================================================ | 70%
|
|============================================================= | 70%
|
|============================================================= | 71%
|
|============================================================= | 72%
|
|============================================================== | 72%
|
|============================================================== | 73%
|
|=============================================================== | 73%
|
|=============================================================== | 74%
|
|================================================================ | 74%
|
|================================================================ | 75%
|
|================================================================= | 75%
|
|================================================================= | 76%
|
|================================================================== | 76%
|
|================================================================== | 77%
|
|=================================================================== | 77%
|
|=================================================================== | 78%
|
|==================================================================== | 78%
|
|==================================================================== | 79%
|
|==================================================================== | 80%
|
|===================================================================== | 80%
|
|===================================================================== | 81%
|
|====================================================================== | 81%
|
|====================================================================== | 82%
|
|======================================================================= | 82%
|
|======================================================================= | 83%
|
|======================================================================== | 83%
|
|======================================================================== | 84%
|
|========================================================================= | 84%
|
|========================================================================= | 85%
|
|========================================================================== | 85%
|
|========================================================================== | 86%
|
|========================================================================== | 87%
|
|=========================================================================== | 87%
|
|=========================================================================== | 88%
|
|============================================================================ | 88%
|
|============================================================================ | 89%
|
|============================================================================= | 89%
|
|============================================================================= | 90%
|
|============================================================================== | 90%
|
|============================================================================== | 91%
|
|=============================================================================== | 91%
|
|=============================================================================== | 92%
|
|================================================================================ | 92%
|
|================================================================================ | 93%
|
|================================================================================ | 94%
|
|================================================================================= | 94%
|
|================================================================================= | 95%
|
|================================================================================== | 95%
|
|================================================================================== | 96%
|
|=================================================================================== | 96%
|
|=================================================================================== | 97%
|
|==================================================================================== | 97%
|
|==================================================================================== | 98%
|
|===================================================================================== | 98%
|
|===================================================================================== | 99%
|
|======================================================================================| 99%
|
|======================================================================================| 100%
Time difference of 12812.04 secs
Adjusting some of the tables and writing them out to save them.
# giving our seq headers more manageable names (ASV_1, ASV_2...)
asv_seqs <- colnames(seqtab.nochim)
asv_headers <- vector(dim(seqtab.nochim)[2], mode="character")
for (i in 1:dim(seqtab.nochim)[2]) {
asv_headers[i] <- paste(">ASV", i, sep="_")
}
# making and writing out a fasta of our final ASV seqs:
asv_fasta <- c(rbind(asv_headers, asv_seqs))
write(asv_fasta, "ASVs.fa")
# count table:
asv_tab <- t(seqtab.nochim)
row.names(asv_tab) <- sub(">", "", asv_headers)
write.table(asv_tab, "ASVs_counts.tsv", sep="\t", quote=F, col.names=NA)
# tax table:
# creating table of taxonomy and setting any that are unclassified as "NA"
ranks <- c("domain", "phylum", "class", "order", "family", "genus", "species")
asv_tax <- t(sapply(tax_info, function(x) {
m <- match(ranks, x$rank)
taxa <- x$taxon[m]
taxa[startsWith(taxa, "unclassified_")] <- NA
taxa
}))
colnames(asv_tax) <- ranks
rownames(asv_tax) <- gsub(pattern=">", replacement="", x=asv_headers)
write.table(asv_tax, "ASVs_taxonomy.tsv", sep = "\t", quote=F, col.names=NA)
Look at the spread of read length of the different ASVs. Remove any ASVs that doesn’t have an assigned taxonomy on domain level.
##Create histogran showing spread of ASV lengths
library(ggplot2)
asv_list <- as.list(asv_seqs)
asv_length_data <- data.frame(nchar(asv_list))
ggplot(asv_length_data, aes(x=nchar.asv_list.)) +
geom_histogram(binwidth=.5, colour="black", fill="white")
##Remove ASVs that are "NA" on Domain level
asv_tax_df_filtered <- data.frame(asv_tax)[!is.na(data.frame(asv_tax)$"domain"),]
##Creates binary list of which ASVs to keep
asv_to_keep <- !is.na(data.frame(asv_tax)$"domain")
##Removes rows with removed ASVs
asv_seqs_filtered <- asv_seqs[asv_to_keep]
count_tab_filtered <- asv_tab[asv_to_keep,]
asv_list_filtered <- as.list(asv_seqs_filtered)
asv_length_data_filtered <- data.frame(nchar(asv_list_filtered))
ggplot(asv_length_data_filtered, aes(x=nchar.asv_list_filtered.)) +
geom_histogram(binwidth=.5, colour="black", fill="white")
Then any unwanted samples are removed. For the Burkina Faso paper I’m removing sample VK7_25, as it only contains one single read after previous filtering steps.
count_tab <- count_tab_filtered[ , -28]
sample_info_tab <- read.table("/Users/magdalenapolder/Documents/examensarbete/scripting/sample_info.tsv", header=T, row.names=1, check.names=F, sep="\t")[-28, ]
sample_info_tab$color <- as.character(sample_info_tab$color)
Generate a rarefaction curve
library(vegan)
rarecurve(t(count_tab_filtered), step=100, col=sample_info_tab$color, lwd=2, ylab="ASVs", label=F)
Rarify and create phyloseq object
library(phyloseq)
count_rarified <- rrarefy(t(count_tab),2000)
rare_count_phy <- otu_table(t(count_rarified), taxa_are_rows=T)
sample_info_tab_phy <- sample_data(sample_info_tab)
rare_physeq <- phyloseq(rare_count_phy,sample_info_tab_phy)
Plot hierarchial clustering
library(dendextend)
euc_dist <- vegdist(method = "bray",count_rarified)
euc_clust <- hclust(euc_dist, method="ward.D2")
euc_dend <- as.dendrogram(euc_clust, hang=0.1)
dend_cols <- as.character(sample_info_tab$color[order.dendrogram(euc_dend)])
labels_colors(euc_dend) <- dend_cols
plot(euc_dend, ylab="VST Euc. dist.")
Create Ordination plot
library(tidyverse)
vst_pcoa <- ordinate(rare_physeq, method="NMDS", distance="bray")
Square root transformation
Wisconsin double standardization
Run 0 stress 0.191645
Run 1 stress 0.1916256
... New best solution
... Procrustes: rmse 0.01207685 max resid 0.05015089
Run 2 stress 0.1945053
Run 3 stress 0.1916256
... New best solution
... Procrustes: rmse 2.369718e-05 max resid 0.0001026173
... Similar to previous best
Run 4 stress 0.1916256
... Procrustes: rmse 1.608221e-05 max resid 6.936579e-05
... Similar to previous best
Run 5 stress 0.2186031
Run 6 stress 0.2040113
Run 7 stress 0.2298448
Run 8 stress 0.2293373
Run 9 stress 0.1946911
Run 10 stress 0.2210592
Run 11 stress 0.2191235
Run 12 stress 0.2195813
Run 13 stress 0.2042307
Run 14 stress 0.2115282
Run 15 stress 0.2118455
Run 16 stress 0.2320056
Run 17 stress 0.2349141
Run 18 stress 0.2249281
Run 19 stress 0.1945053
Run 20 stress 0.2135648
*** Best solution repeated 2 times
plot_ordination(rare_physeq, vst_pcoa, color="location", shape="type") +
geom_point(size=1) + labs(col="location") +
geom_text(aes(label=rownames(sample_info_tab), hjust=0.3, vjust=-0.4), size=3)
ggsave("/Users/magdalenapolder/Documents/examensarbete/Burkina_faso copy/ordination_plots/1000/1000_4.png")
Saving 7.29 x 4.51 in image
dist_matrix <- metaMDSdist(count_rarified)
Square root transformation
Wisconsin double standardization
ordinate_data <- metaMDS(dist_matrix)
Run 0 stress 0.191645
Run 1 stress 0.2269568
Run 2 stress 0.2357097
Run 3 stress 0.2437579
Run 4 stress 0.2155788
Run 5 stress 0.2042798
Run 6 stress 0.2155515
Run 7 stress 0.228156
Run 8 stress 0.2042798
Run 9 stress 0.2203243
Run 10 stress 0.2287959
Run 11 stress 0.2311873
Run 12 stress 0.1916256
... New best solution
... Procrustes: rmse 0.01207818 max resid 0.05016392
Run 13 stress 0.2371694
Run 14 stress 0.2203062
Run 15 stress 0.2319097
Run 16 stress 0.191645
... Procrustes: rmse 0.01208664 max resid 0.05025614
Run 17 stress 0.2041104
Run 18 stress 0.1945053
Run 19 stress 0.1945053
Run 20 stress 0.2042798
*** Best solution was not repeated -- monoMDS stopping criteria:
1: no. of iterations >= maxit
17: stress ratio > sratmax
2: scale factor of the gradient < sfgrmin
half_ordinate <- metaMDSiter(dist_matrix)
Run 0 stress 0.191645
Run 1 stress 0.191645
... New best solution
... Procrustes: rmse 5.936795e-05 max resid 0.0002542999
... Similar to previous best
Run 2 stress 0.2333242
Run 3 stress 0.2155788
Run 4 stress 0.2277866
Run 5 stress 0.2382804
Run 6 stress 0.219766
Run 7 stress 0.2223616
Run 8 stress 0.2327378
Run 9 stress 0.1916256
... New best solution
... Procrustes: rmse 0.01208187 max resid 0.05018994
Run 10 stress 0.1945053
Run 11 stress 0.2223926
Run 12 stress 0.1916256
... Procrustes: rmse 4.104447e-06 max resid 1.508019e-05
... Similar to previous best
Run 13 stress 0.2277739
Run 14 stress 0.2242054
Run 15 stress 0.2287849
Run 16 stress 0.1916256
... Procrustes: rmse 1.001405e-05 max resid 2.311494e-05
... Similar to previous best
Run 17 stress 0.236222
Run 18 stress 0.2298448
Run 19 stress 0.2291063
Run 20 stress 0.216003
*** Best solution repeated 2 times
plot_ordination(rare_physeq, ordinate_data, color="location", shape="type") +
geom_point(size=1) + labs(col="location") +
geom_text(aes(label=rownames(sample_info_tab), hjust=0.3, vjust=-0.4), size=3)
Alpha diversity
count_tab_phy <- otu_table(count_tab_filtered, taxa_are_rows=T)
tax_tab <- asv_tax[!is.na(data.frame(asv_tax)$"domain"),]
tax_tab_phy <- tax_table(tax_tab)
ASV_physeq <- phyloseq(count_tab_phy, tax_tab_phy, sample_info_tab_phy)
# and now we can call the plot_richness() function on our phyloseq object
plot_richness(ASV_physeq, color="location", measures=c("Chao1", "Shannon")) +
scale_color_manual(values=unique(sample_info_tab$color[order(sample_info_tab$location)])) +
theme_bw() + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
plot_richness(ASV_physeq, x="location", color="type", measures=c("Chao1", "Shannon")) +
scale_color_manual(values=unique(sample_info_tab$color[order(sample_info_tab$location)])) +
theme_bw() + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
indices <- c("shannon", "faith")
names <- c("Shannon_diversity","Faith_diversity")
#calculate indices
tse <- estimate_richness(ASV_physeq)
tse$location <- sample_info_tab$location
ggplot(tse, aes(x = location, y = Shannon, color = location)) + geom_boxplot()
NA
NA
NA
ordinate_data[["dist"]]
[1] -0.09281161 -0.10193725 -0.13670718 -0.15259760 -0.13985099 -0.02962592 -0.11109422
[8] -0.06954377 -0.09791743 -0.22475498 -0.15565570 -0.15409021 -0.17596339 -0.03582033
[15] -0.16517080 -0.11827076 -0.16834382 -0.11967518 -0.04330211 -0.17604207 -0.08168016
[22] -0.09781192 -0.15439196 -0.07286335 -0.07650653 -0.14929482 -0.06110163 -0.08528240
[29] -0.15002720 -0.23058729 -0.12499490 -0.09016489 -0.04854012 -0.11088407 -0.23312946
[36] -0.15252417 -0.02621555 -0.05617870 -0.05432640 -0.09735404 -0.09181446 -0.08080807
[43] -0.10901591 -0.09868725 -0.09718686 -0.17221427 -0.15432413 -0.12484085 -0.17082844
[50] -0.16667937 -0.20281269 -0.18448366 -0.13079872 -0.24510700 -0.12879330 -0.12261658
[57] -0.21051047 -0.23020756 -0.13630459 -0.09818634 -0.16371609 -0.23306476 -0.11149708
[64] -0.30519274 -0.20697627 -0.13447327 -0.06277337 -0.12454856 -0.12730503 -0.22124553
[71] -0.16792193 -0.24641704 -0.11680956 -0.21096445 -0.21121701 -0.24848564 -0.15295613
[78] -0.29964611 -0.18972047 -0.14969726 -0.18236446 -0.09981440 -0.17205406 -0.09503488
[85] -0.22258493 -0.20876766 -0.26939764 -0.14940354 -0.30665285 -0.24765294 -0.09849378
[92] -0.25403222 -0.17017698 -0.23439579 -0.21018417 -0.12339047 -0.20521643 -0.09721714
[99] -0.27346945 -0.49308124 -0.19030858 -0.25548687 -0.10212028 -0.11892443 -0.23112172
[106] -0.23133693 -0.39224650 -0.28442019 -0.29867283 -0.22252669 -0.22818134 -0.34158922
[113] -0.24301136 -0.15331349 -0.34778987 -0.27273400 -0.30842570 -0.28954940 -0.39179475
[120] -0.24288122 -0.38297570 -0.34538046 -0.45936685 -0.28365705 -0.36443502 -0.28217774
[127] -0.29771365 -0.31500115 -0.39783731 -0.35057814 -0.17569924 -0.38478807 -0.29954570
[134] -0.26214372 -0.35247382 -0.21754210 -0.18198581 -0.22279226 -0.33475697 -0.18629762
[141] -0.36464550 -0.34482599 -0.39620779 -0.23123484 -0.29241203 -0.29401935 -0.38172154
[148] -0.29430690 -0.42964657 -0.25057773 -0.40666203 -0.21209455 -0.10407222 -0.53921582
[155] -0.46431762 -0.31493791 -0.39520992 -0.38678300 -0.25490177 -0.31286889 -0.44542399
[162] -0.38488945 -0.38467097 -0.50975709 -0.30073805 -0.34979397 -0.28729287 -0.36703516
[169] -0.35996171 -0.42087192 -0.38654053 -0.26151757 -0.47630179 -0.42543936 -0.22274511
[176] -0.27565005 -0.40357546 -0.24664689 -0.32652122 -0.24141891 -0.26228558 -0.39111601
[183] -0.23776300 -0.39082348 -0.14897364 -0.41824198 -0.35201896 -0.35654128 -0.36930501
[190] -0.43043954 -0.36321732 -0.30896773 -0.17306667 -0.28846941 -0.08706841 -0.15137823
[197] -0.38677999 -0.13256227 -0.60684487 -0.48009145 -0.40974489 -0.52679795 -0.27151700
[204] -0.20481734 -0.28982708 -0.42466358 -0.44258714 -0.33364856 -0.40021411 -0.39479388
[211] -0.50559166 -0.41082816 -0.76210199 -0.31568190 -0.39075747 -0.50474458 -0.38257946
[218] -0.53022976 -0.14870299 -0.28332803 -0.49726422 -0.30753490 -0.47221407 -0.41448000
[225] -0.62095308 -0.31160528 -0.45552544 -0.44207160 -0.42209036 -0.25210169 -0.35653292
[232] -0.40200910 -0.41521558 -0.16907293 -0.40072693 -0.37005626 -0.40456523 -0.49105111
[239] -0.37618573 -0.38929129 -0.36362772 -0.32989249 -0.27807112 -0.36319234 -0.36522949
[246] -0.44488272 -0.22528064 -0.37724387 -0.49087402 -0.12054063 -0.38921086 -0.32641953
[253] -0.60362643 -0.43071110 -0.51781130 -0.34782354 -0.43609117 -0.39873823 -0.46176255
[260] -0.37118764 -0.41066884 -0.50652267 -0.46522766 -0.49374221 -0.27589024 -0.34518847
[267] -0.37494218 -0.48778595 -0.46901324 -0.52066180 -0.44874704 -0.42148770 -0.35935067
[274] -0.38246950 -0.50090375 -0.43003791 -0.35574545 -0.32483933 -0.36055201 -0.40856535
[281] -0.50282562 -0.46387696 -0.36518428 -0.38482022 -0.58299424 -0.49290917 -0.48742221
[288] -0.62272301 -0.45862664 -0.50708252 -0.50196816 -0.28746024 -0.48570486 -0.47317235
[295] -0.41622431 -0.37243146 -0.46335324 -0.48159555 -0.30540738 -0.57692034 -0.63118012
[302] -0.38016248 -0.37665120 -0.60273992 -0.47668664 -0.43216098 -0.25939835 -0.45677465
[309] -0.61299598 -0.57348466 -0.37926911 -0.39692417 -0.35197925 -0.52722492 -0.44448616
[316] -0.37375436 -0.48441765 -0.52984292 -0.52097010 -0.48126525 -0.45419585 -0.51808935
[323] -0.50184247 -0.69461523 -0.65830433 -0.16507227 -0.47582374 -0.48802434 -0.75093248
[330] -0.25376010 -0.68254682 -0.52634789 -0.38563875 -0.29481833 -0.53033696 -0.43373550
[337] -0.58577634 -0.41507614 -0.55658799 -0.10148676 -0.48688412 -0.47498620 -0.48041139
[344] -0.51149923 -0.59008493 -0.47740206 -0.48911235 -0.47852482 -0.48282701 -0.44851387
[351] -0.61051290 -0.45642638 -0.64138875 -0.70203399 -0.38500107 -0.61332656 -0.55070890
[358] -0.57181511 -0.63600576 -0.44115728 -0.53797583 -0.49818419 -0.39523496 -0.47791113
[365] -0.42343310 -0.59409163 -0.56821046 -0.57436380 -0.49156958 -0.30353775 -0.65456908
[372] -0.61365688 -0.49750233 -0.53889396 -0.73620258 -0.67029948 -0.75543803 -0.70601270
[379] -0.51186855 -0.50530857 -0.44512164 -0.54939328 -0.59160073 -0.48426135 -0.54465489
[386] -0.49837241 -0.57198386 -0.44651631 -0.58970751 -0.77661470 -0.44889557 -0.53877421
[393] -0.65063810 -0.51426797 -0.62139449 -0.49765588 -0.26736123 -0.65309725 -0.54998890
[400] -0.60690277 -0.50779146 -0.70859165 -0.61537331 -0.52298763 -0.79939243 -0.46307469
[407] -0.47888451 -0.41872182 -0.61168512 -0.50381161 -0.61965453 -0.59892827 -0.71619410
[414] -0.35454205 -0.52724345 -0.22700798 -0.55052732 -0.64654828 -0.61869098 -0.67975884
[421] -0.72766140 -0.69826375 -0.55012695 -0.64274931 -0.57568827 -0.54732710 -0.59284089
[428] -0.43075494 -0.55330637 -0.65529151 -0.46603375 -0.60768632 -0.51394736 -0.51757038
[435] -0.44957594 -0.55994569 -0.62353968 -0.75714914 -0.56926734 -0.74217421 -0.67040785
[442] -0.70277616 -0.60163962 -0.80565607 -0.44522576 -0.67365074 -0.56522980 -0.73322112
[449] -0.59209876 -0.71077067 -0.54446326 -0.56636245 -0.82492567 -0.51184183 -0.51390725
[456] -0.41996728 -0.72475619 -0.57278543 -0.67448818 -0.84660548 -0.66031123 -0.58432658
[463] -0.63712116 -0.57330628 -0.72917686 -0.62650123 -0.54049966 -0.63739580 -0.68688633
[470] -0.74968238 -0.51557928 -0.59941471 -0.59000417 -0.50748632 -0.84141209 -0.57145183
[477] -0.52748431 -0.61212486 -0.63475595 -0.68128003 -0.61045692 -0.71436612 -0.68737405
[484] -0.68544203 -0.64854059 -0.66909176 -0.70048526 -0.56926783 -0.84114210 -0.71243700
[491] -0.51396530 -0.65687484 -0.63862340 -0.73419986 -0.55615149 -0.61563960 -0.53345184
[498] -0.84332123 -0.67864239 -0.78512181 -0.68964772 -0.63981907 -0.74814348 -0.54103734
[505] -0.80172480 -0.81923465 -0.46189862 -0.71343757 -0.51741444 -0.70033307 -0.60569275
[512] -0.79688202 -0.64619024 -0.72516531 -0.61819249 -0.59416813 -0.72637874 -0.71000548
[519] -0.82664481 -0.74963605 -0.70039952 -0.80836094 -0.64536260 -0.96347396 -0.77638682
[526] -0.72661385 -0.70971317 -0.73176863 -0.80224995 -0.69945350 -0.56637719 -0.81818397
[533] -0.93299455 -0.69461022 -0.76403844 -0.59296762 -0.69619241 -0.78793829 -0.92657754
[540] -0.72157366 -0.97397906 -0.89638270 -0.77829507 -0.66170645 -0.92497123 -0.79992052
[547] -1.06419057 -0.68428643 -0.76155370 -0.77144042 -0.92081577 -1.01293456 -0.86116746
[554] -0.79155975 -0.99701556 -0.76621733 -0.95624784 -0.96609000 -0.92157562 -0.90894809
[561] -0.92114599
half_ordinate[["dist"]]
[1] 0.27086777 0.29750391 0.39898778 0.44536983 0.40815860 0.08645803 0.32423692 0.20296428
[9] 0.28577216 0.65595905 0.45429251 0.44970945 0.51354024 0.10455201 0.48205859 0.34515997
[17] 0.49132669 0.34927020 0.12637709 0.51378927 0.23839726 0.28546052 0.45060142 0.21264579
[25] 0.22328369 0.43570983 0.17832368 0.24888796 0.43783882 0.67297603 0.36479318 0.26314577
[33] 0.14166704 0.32362101 0.68039004 0.44513105 0.07653953 0.16396787 0.15853723 0.28413169
[41] 0.26795527 0.23583044 0.31816013 0.28821005 0.28344378 0.50260987 0.45039460 0.36435514
[49] 0.49855619 0.48658708 0.59191090 0.53828109 0.38174072 0.71534298 0.37587197 0.35769464
[57] 0.61436800 0.67186429 0.39780656 0.28655584 0.47779184 0.68020356 0.32558691 0.89070939
[65] 0.60406346 0.39245112 0.18321176 0.36354774 0.37152920 0.64570662 0.48995587 0.71908170
[73] 0.34084572 0.61569012 0.61639357 0.72520778 0.44645995 0.87449995 0.55361706 0.43688990
[81] 0.53221255 0.29120954 0.50220467 0.27735128 0.64966609 0.60927971 0.78620468 0.43616757
[89] 0.89501518 0.72277278 0.28750553 0.74138187 0.49665175 0.68408214 0.61348341 0.36007006
[97] 0.59878026 0.28369599 0.79809844 1.43898413 0.55540333 0.74563817 0.29801498 0.34706568
[105] 0.67449771 0.67514099 1.14470862 0.83006922 0.87177721 0.64938937 0.66584646 0.99689717
[113] 0.70916097 0.44742598 1.01500650 0.79594079 0.90013742 0.84511808 1.14358495 0.70880768
[121] 1.11769746 1.00793152 1.34062191 0.82783675 1.06354114 0.82341849 0.86886791 0.91928017
[129] 1.16121452 1.02328803 0.51283974 1.12294088 0.87417082 0.76505813 1.02864418 0.63489589
[137] 0.53095591 0.65019693 0.97693971 0.54357379 1.06421565 1.00634855 1.15630909 0.67485663
[145] 0.85339364 0.85803472 1.11402333 0.85902137 1.25385284 0.73125735 1.18661256 0.61901751
[153] 0.30372498 1.57364461 1.35506087 0.91895956 1.15337757 1.12878274 0.74390216 0.91307885
[161] 1.29992424 1.12314916 1.12264922 1.48780093 0.87769032 1.02080845 0.83844350 1.07116925
[169] 1.05036090 1.22826317 1.12811068 0.76315124 1.39006283 1.24162402 0.65002865 0.80427781
[177] 1.17791219 0.71981567 0.95292832 0.70455123 0.76549227 1.14141865 0.69386571 1.14063582
[185] 0.43484529 1.22052038 1.02734356 1.04047131 1.07781055 1.25622078 1.06005052 0.90154783
[193] 0.50509453 0.84188686 0.25405881 0.44178499 1.12877733 0.38684123 1.77104165 1.40105986
[201] 1.19582063 1.53740985 0.79241414 0.59776109 0.84586886 1.23937165 1.29168858 0.97374080
[209] 1.16800164 1.15216157 1.47553145 1.19901204 2.22409222 0.92128783 1.14016707 1.47296478
[217] 1.11654249 1.54743428 0.43397701 0.82688595 1.45121152 0.89775281 1.37810485 1.20946679
[225] 1.81218059 0.90936730 1.32947043 1.29011066 1.23173425 0.73573765 1.04053268 1.17323799
[233] 1.21177205 0.49338220 1.16950966 1.07997060 1.18073841 1.43307484 1.09782873 1.13608926
[241] 1.06122216 0.96272716 0.81153392 1.06000070 1.06612117 1.29847280 0.65745605 1.10104636
[249] 1.43259222 0.35192643 1.13588849 0.95265279 1.76158639 1.25706995 1.51123168 1.01511160
[257] 1.27266548 1.16371770 1.34776006 1.08329879 1.19853756 1.47826827 1.35778496 1.44089692
[265] 0.80516655 1.00739907 1.09426893 1.42355124 1.36876736 1.51947627 1.30961760 1.22999386
[273] 1.04865956 1.11617715 1.46186751 1.25479250 1.03830389 0.94800983 1.05226244 1.19235916
[281] 1.46747060 1.35382703 1.06566409 1.12305253 1.70147607 1.43849143 1.42248276 1.81735904
[289] 1.33850623 1.47989605 1.46487260 0.83891891 1.41753943 1.38093476 1.21472377 1.08711729
[297] 1.35226127 1.40551372 0.89131656 1.68371892 1.84204526 1.10950143 1.09920674 1.75901335
[305] 1.39118527 1.26124806 0.75704508 1.33311224 1.78895661 1.67372734 1.10686194 1.15840481
[313] 1.02723595 1.53880338 1.29730152 1.09076421 1.41371068 1.54629948 1.52039815 1.40455340
[321] 1.32552104 1.51199179 1.46460618 2.02716295 1.92115834 0.48179183 1.38864130 1.42428074
[329] 2.19154121 0.74062837 1.99197729 1.53609065 1.12542096 0.86036484 1.54776196 1.26596969
[337] 1.70955885 1.21134345 1.62435317 0.29618063 1.42096131 1.38603153 1.40205993 1.49276744
[345] 1.72211131 1.39326358 1.42746253 1.39647166 1.40910090 1.30895690 1.78171959 1.33202835
[353] 1.87187981 2.04880317 1.12359370 1.78991831 1.60722149 1.66881879 1.85616213 1.28748169
[361] 1.57004609 1.45388529 1.15342581 1.39475808 1.23577476 1.73380318 1.65825536 1.67621382
[369] 1.43462608 0.88593628 1.91031937 1.79089093 1.45194103 1.57271115 2.14856339 1.95626495
[377] 2.20470695 2.06040734 1.49383081 1.47468895 1.29906849 1.60335172 1.72661172 1.41324712
[385] 1.58956002 1.45450199 1.66928937 1.30314023 1.72104394 2.26650502 1.31002960 1.57238654
[393] 1.89882321 1.50081813 1.81348835 1.45237101 0.78024030 1.90605568 1.60514867 1.77117761
[401] 1.48193723 2.06793740 1.79592969 1.52631489 2.33294410 1.35141350 1.39756658 1.22202163
[409] 1.78518227 1.47033722 1.80841401 1.74794425 2.09014205 1.03459079 1.53868429 0.66248682
[417] 1.60666966 1.88687765 1.80560792 1.98395146 2.12366854 2.03783655 1.60544589 1.87586159
[425] 1.68009261 1.59738155 1.73018019 1.25706325 1.61477557 1.91240639 1.36008795 1.77347131
[433] 1.49991770 1.51048806 1.31206621 1.63411856 1.81974286 2.20964273 1.66126145 2.16597127
[441] 1.95659825 2.05101594 1.75584195 2.35121320 1.29932358 1.96598965 1.64957540 2.13985164
[449] 1.72796858 2.07433185 1.58899047 1.65292003 2.40745470 1.49378707 1.49980603 1.22567987
[457] 2.11505725 1.67161648 1.96848782 2.47077075 1.92700734 1.70529839 1.85941210 1.67316737
[465] 2.12809778 1.82840540 1.57741466 1.86019307 2.00462233 2.18788443 1.50469041 1.74934056
[473] 1.72187601 1.48101546 2.45556158 1.66775975 1.53940447 1.78643502 1.85224819 1.98833164
[481] 1.78158133 2.08481908 2.00603191 2.00040398 1.89268424 1.95268810 2.04430432 1.66137951
[489] 2.45477378 2.07915879 1.49995722 1.91701941 1.86377878 2.14274614 1.62307558 1.79670802
[497] 1.55683044 2.46115251 1.98060260 2.29127979 2.01273196 1.86726747 2.18353610 1.57898534
[505] 2.33972413 2.39088230 1.34802795 2.08215351 1.51005794 2.04381027 1.76767744 2.32560241
[513] 1.88585950 2.11632608 1.80415915 1.73401896 2.11997252 2.07208351 2.41247535 2.18774530
[521] 2.04407784 2.35910731 1.88357145 2.81183827 2.26582795 2.12057506 2.07122954 2.13560427
[529] 2.34128453 2.04127532 1.65289029 2.38779165 2.72284049 2.02699049 2.22984088 1.73046858
[537] 2.03174849 2.29958655 2.70412606 2.10582873 2.84249239 2.61601044 2.27140646 1.93112523
[545] 2.69944306 2.33448504 3.10577318 1.99702160 2.22254155 2.25139601 2.68731030 2.95614744
[553] 2.51331519 2.31011379 2.90969313 2.23615250 2.79071084 2.81944066 2.68950829 2.65266239
[561] 2.68827326
#start_time <- Sys.time()
#Define Input
count_input <- count_tab
info_input <- sample_info_tab
rarefaction_threshhold <- 50
repeat_amount <- 4
#Set up working files
rarified_count <- rrarefy(t(count_input),rarefaction_threshhold)
duplicated_info <- info_input
for (x in 2:repeat_amount){
rarified_count <- rbind(rarified_count,rrarefy(t(count_input),2000))
duplicated_info <- rbind(duplicated_info, info_input)
}
rare_count_phy_repeat <- otu_table(t(rarified_count), taxa_are_rows=T)
sample_info_tab_phy_repeat <- sample_data(duplicated_info)
rare_physeq_repeat <- phyloseq(rare_count_phy_repeat,sample_info_tab_phy_repeat)
vst_pcoa_repeat <- ordinate(rare_physeq_repeat, method="NMDS", distance="bray")
Square root transformation
Wisconsin double standardization
Run 0 stress 0.229054
Run 1 stress 0.2423525
Run 2 stress 0.2373592
Run 3 stress 0.2443532
Run 4 stress 0.2340258
Run 5 stress 0.2333971
Run 6 stress 0.2522531
Run 7 stress 0.2309631
Run 8 stress 0.2447999
Run 9 stress 0.2333792
Run 10 stress 0.2516151
Run 11 stress 0.2454592
Run 12 stress 0.2423429
Run 13 stress 0.2381605
Run 14 stress 0.2419273
Run 15 stress 0.2340861
Run 16 stress 0.2325367
Run 17 stress 0.2469227
Run 18 stress 0.2334545
Run 19 stress 0.2304565
Run 20 stress 0.2321531
*** Best solution was not repeated -- monoMDS stopping criteria:
2: no. of iterations >= maxit
18: stress ratio > sratmax
plot_ordination(rare_physeq_repeat, vst_pcoa_repeat, color="location", shape="type") #+
#geom_point(size=1) + labs(col="location") +
#geom_text(aes(label=rownames(duplicated_info), hjust=0.3, vjust=-0.4), size=3)
#end_time <- Sys.time()
#run_time <- difftime(end_time,start_time, units = "secs" )
#time_data <- rbind(time_data,c(rarefaction_threshhold,repeat_amount,run_time))